import requests
import re

class Html_Analysis():
    ##简单的请求头 获得网页代码
    ua_headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}
    html=""

    def __init__(self, myurl=""):
        req = requests.get(url=myurl, headers=self.ua_headers)
        self.html = req.content.decode("utf-8")
        self.ori_html=self.html+""
    def Analysis_2(self):
        c = self.html.find(r'''"url":"http:''')
        self.html = self.html[c + 7:]
        c = self.html.find(r'''"''')
        self.html = self.html[0:c]
        return [self.html]
    def Analysis(self):
        if self.html!=None:
            self.a=self.html.find("\"id\":80,\"")
            if(self.a==-1):
                return self.Analysis_2()
            self.video_html=self.html[(self.a+19):]
            self.a=self.video_html.find("\",\"base_url\"")
            self.video_html=self.video_html[:self.a]

            self.a = self.html.find("\"id\":30216,\"")
            self.voice_html=self.html[(self.a+22):]
            self.a=self.voice_html.find("\",\"base_url\"")
            self.voice_html=self.voice_html[:self.a]
            return [self.video_html,self.voice_html]
        else :
            print("error")
        ##通过对网页源码分析 可以找到如上的规律 视频URL是id为80在第一个baseUrl后面 且第一个baseUrl为最好画质
        ##声音 id为30216的baseUrl后面

